library(nlme)
library(ggplot2)
library(multcomp)
library(emmeans)
library(rstatix)
library(ggpubr)
require(tidyverse)
library(pheatmap)
require(RColorBrewer)

color_panel<-c("#e35d6a","#5bb75b","#428bca","#e87810","#23496b","#ffbf00","#cc2028","#039748","pink","darkgray")
names(color_panel)<-sort(c("serum","Biomatrica","EDTA separator","EDTA","Citrate","RNA Streck","Roche","DNA Streck", "PAXgene", "ACD-A"))

set.seed(189672)
theme_point<-theme_bw()+theme(strip.background = element_blank(), panel.background = element_rect(colour = "black", size=0.3))

theme_point2<-theme(panel.background = element_rect(colour = "black", size=0.3), strip.background = element_blank())

1 Introduction

For every metric used to compare kit-tube combinations, we tested the possibility of interaction. We build a linear mixed-effects model with tube, RNA isolation kit and timelapse as fixed effects and donorID as random effect. The heteroscedasticity introduced by different RNAisolation kits was taken into account.

2 Annotation

3 Sequencing and preprocessing

3.1 Reads

r_stat <- read.csv('./data_output/reads_melt_pre.csv',sep=",",header = TRUE)

r_stat$Tube<-as.factor(r_stat$Tube)
r_stat$RNAisolation<-as.factor(r_stat$RNAisolation)
r_stat$TimeLapse<-as.factor(r_stat$TimeLapse)
r_stat$donorID<-as.factor(r_stat$donorID)

Tube.levels<-levels(r_stat$Tube)
RNAisolation.levels<-levels(r_stat$RNAisolation)
TimeLapse.levels<-levels(r_stat$TimeLapse)

Linear mixed-effects model with crossed fixed effects of tube, kit and timelapse.

m1<-lme(reads~Tube*RNAisolation*TimeLapse,
       random=~1|donorID, 
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=r_stat)

anova(m1)
##                             numDF denDF   F-value p-value
## (Intercept)                     1    68 1144.7625  <.0001
## Tube                            2    68    0.6097  0.5464
## RNAisolation                    1    68   14.2283  0.0003
## TimeLapse                       2    68    0.4590  0.6339
## Tube:RNAisolation               2    68    0.9622  0.3872
## Tube:TimeLapse                  4    68    0.3651  0.8327
## RNAisolation:TimeLapse          2    68    0.3767  0.6875
## Tube:RNAisolation:TimeLapse     4    68    0.2433  0.9128
anova_m1 <- round(anova(m1)[c(5, 6, 7), c(4)], digits = 3)

Only RNAisolation kit has a significant effect, there are no interactions.

The normality of the residuals is checked.

qqnorm(m1$residuals)
qqline(m1$residuals)

3.2 Kallisto alignment score

align_stat <- read.csv('./data_output/kallisto_aligned.csv',sep=",",header = TRUE)

align_stat$Tube<-as.factor(align_stat$Tube)
align_stat$RNAisolation<-as.factor(align_stat$RNAisolation)
align_stat$TimeLapse<-as.factor(align_stat$TimeLapse)
align_stat$donorID<-as.factor(align_stat$donorID)

Tube.levels<-levels(align_stat$Tube)
RNAisolation.levels<-levels(align_stat$RNAisolation)
TimeLapse.levels<-levels(align_stat$TimeLapse)

Linear mixed-effects model with crossed fixed effects of tube, kit and timelapse.

align_m1<-lme(percent_aligned~Tube*RNAisolation*TimeLapse,
       random=~1|donorID,
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=align_stat)

anova_align_m1 <- round(anova(align_m1)[c(5, 6, 7), c(4)], digits = 3) 
anova(align_m1)
##                             numDF denDF   F-value p-value
## (Intercept)                     1    68 2591.7871  <.0001
## Tube                            2    68   15.6366  <.0001
## RNAisolation                    1    68    1.3057  0.2572
## TimeLapse                       2    68    0.5747  0.5656
## Tube:RNAisolation               2    68    7.3842  0.0013
## Tube:TimeLapse                  4    68    1.8605  0.1275
## RNAisolation:TimeLapse          2    68    1.7693  0.1782
## Tube:RNAisolation:TimeLapse     4    68    1.6238  0.1783

There seems to be an interaction between tube and RNA isolation kit. There is no third-order interaction with time. Next, we look at second-order interactions:

align_m2<-lme(percent_aligned~(Tube+RNAisolation+TimeLapse)^2,
       random=~1|donorID, 
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=align_stat)

anova(align_m2)
##                        numDF denDF   F-value p-value
## (Intercept)                1    72 2589.3099  <.0001
## Tube                       2    72   14.9891  <.0001
## RNAisolation               1    72    1.2600  0.2654
## TimeLapse                  2    72    0.5656  0.5705
## Tube:RNAisolation          2    72    7.1253  0.0015
## Tube:TimeLapse             4    72    1.7972  0.1388
## RNAisolation:TimeLapse     2    72    1.7073  0.1886

Again, we see an interaction between tube and kit. Finally, we only include the tube-kit interaction in the model.

align_m3<-lme(percent_aligned~Tube+RNAisolation+TimeLapse+
             Tube:RNAisolation,
       random=~1|donorID,
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=align_stat)

anova(align_m3)
##                   numDF denDF   F-value p-value
## (Intercept)           1    78 2581.7144  <.0001
## Tube                  2    78   13.8071  <.0001
## RNAisolation          1    78    1.1834  0.2800
## TimeLapse             2    78    0.5650  0.5707
## Tube:RNAisolation     2    78    6.6926  0.0021

The normality of the residuals is checked.

qqnorm(align_m3$residuals)
qqline(align_m3$residuals)

For both RNA isolation methods, the effect of tube is investigated. The data is collapsed over time.

align_m.methods<-emmeans(align_m3, ~Tube|RNAisolation)
plot(align_m.methods, comparisons=TRUE)

The arrows in the plot originate in the estimated marginal mean of that group, the length is an approximation of the margin of error if compared with the marginal mean of the closest other group. The arrow is one-sided if there is no other group with a smaller (to the left) or bigger (to the right) marginal mean to compare with. If the arrows of two groups overlap, it means that the margin of error is bigger than the difference. If there is no overlap, which we see in the serum tube group if extracted with the QIAamp kit, it means that the difference is bigger than the margin or error. Thus, there is a significant difference.

pairs(align_m.methods)
## RNAisolation = miRNeasySPkit:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA     0.799 1.05 78   0.759  0.7294
##  Citrate - serum    2.131 1.05 78   2.023  0.1135
##  EDTA - serum       1.332 1.05 78   1.264  0.4196
## 
## RNAisolation = QIAamp:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA     0.841 1.39 78   0.607  0.8168
##  Citrate - serum    7.669 1.39 78   5.532  <.0001
##  EDTA - serum       6.828 1.39 78   4.925  <.0001
## 
## Results are averaged over the levels of: TimeLapse 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates
confint(align_m.methods, adjust="mvt")
## RNAisolation = miRNeasySPkit:
##  Tube    emmean   SE df lower.CL upper.CL
##  Citrate   76.5 1.63  4     71.0     82.0
##  EDTA      75.7 1.63  4     70.2     81.2
##  serum     74.4 1.63  4     68.9     79.9
## 
## RNAisolation = QIAamp:
##  Tube    emmean   SE df lower.CL upper.CL
##  Citrate   79.1 1.75  4     73.0     85.3
##  EDTA      78.3 1.75  4     72.2     84.4
##  serum     71.5 1.75  4     65.3     77.6
## 
## Results are averaged over the levels of: TimeLapse 
## Degrees-of-freedom method: containment 
## Confidence level used: 0.95 
## Conf-level adjustment: mvt method for 3 estimates

We can conclude that the alignment percentage is significantly lower in the serum tube compared to the other tubes, but only if extracted with the QIAamp kit.

# ANOVA test between tubes of same kit
align_stats <- align_stat %>% group_by(RNAisolation, Tube) %>% get_summary_stats(percent_aligned, type = "mean_sd")
align_tube_effect <- align_stat %>% group_by(RNAisolation) %>% anova_test(percent_aligned ~ Tube)
align_kit_effect <- align_stat %>% group_by(Tube) %>% anova_test(percent_aligned ~ RNAisolation)

# Pairwise comparisons
align_pwc <- align_stat %>% group_by(RNAisolation) %>% pairwise_t_test(percent_aligned ~ Tube) %>% add_xy_position(x = "Tube")
align_pwc_time <- align_stat %>% group_by(RNAisolation, TimeLapse) %>% pairwise_t_test(percent_aligned ~ Tube) %>% add_xy_position(x = "Tube")

# Show p-values
ggplot(align_stat, aes(x = Tube, y = percent_aligned, color = Tube)) +
  geom_boxplot() +
  geom_point() +
  scale_colour_manual(values=color_panel) +
  facet_wrap(~ RNAisolation) +
  stat_pvalue_manual(align_pwc, label = "p.signif") +
  labs(title = "tube comparison within kits (T test)")

#ggsave("./data_output/plots/duplicates_stats.pdf", plot = ggplot2::last_plot(), dpi = 300)

3.3 Duplicate rate

dupl_stat<-read.csv("./data_output/duplicates_complete.csv",sep=",",header = TRUE)

dupl_stat$Tube<-as.factor(dupl_stat$Tube)
dupl_stat$RNAisolation<-as.factor(dupl_stat$RNAisolation)
dupl_stat$TimeLapse<-as.factor(dupl_stat$TimeLapse)
dupl_stat$donorID<-as.factor(dupl_stat$donorID)

Tube.levels<-levels(dupl_stat$Tube)
RNAisolation.levels<-levels(dupl_stat$RNAisolation)
TimeLapse.levels<-levels(dupl_stat$TimeLapse)

Linear mixed-effects model with crossed fixed effects of tube, kit and timelapse.

#dupl_m <- aov(duplicates ~ RNAisolation*Tube*TimeLapse, dupl_stat)
#summary(dupl_m)
#dupl_m_RM <- aov(duplicates ~ RNAisolation*Tube*TimeLapse + Error(donorID), dupl_stat)
#summary(dupl_m_RM)

dupl_m1<-lme(duplicates~Tube*RNAisolation*TimeLapse,
       random=~1|donorID, 
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=dupl_stat)

anova_dupl_m1 <- round(anova(dupl_m1)[c(5, 6, 7), c(4)], digits = 3)
anova(dupl_m1)
##                             numDF denDF   F-value p-value
## (Intercept)                     1    68 143322.29  <.0001
## Tube                            2    68      0.28  0.7571
## RNAisolation                    1    68    285.44  <.0001
## TimeLapse                       2    68      1.13  0.3276
## Tube:RNAisolation               2    68      7.51  0.0011
## Tube:TimeLapse                  4    68      2.51  0.0499
## RNAisolation:TimeLapse          2    68      0.98  0.3823
## Tube:RNAisolation:TimeLapse     4    68      0.84  0.5058

The model indicates that there are interaction effects with Tube. Thus the effect of RNA isolation method is not the same for all tubes, and the effect of time is not the same for all tubes. However, there is no significant third-order interaction. Next, we look at second-order interactions:

dupl_m2<-lme(duplicates~(Tube+RNAisolation+TimeLapse)^2,
       random=~1|donorID, 
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=dupl_stat)

anova(dupl_m2)
##                        numDF denDF   F-value p-value
## (Intercept)                1    72 143067.36  <.0001
## Tube                       2    72      0.29  0.7485
## RNAisolation               1    72    289.24  <.0001
## TimeLapse                  2    72      1.15  0.3239
## Tube:RNAisolation          2    72      7.61  0.0010
## Tube:TimeLapse             4    72      2.53  0.0480
## RNAisolation:TimeLapse     2    72      0.99  0.3772

Again, we see interaction effects with Tube. Finally, we only include the tube-kit and tube-time effect in the model.

dupl_m3<-lme(duplicates~Tube+RNAisolation+TimeLapse+
             Tube:RNAisolation+Tube:TimeLapse,
       random=~1|donorID,
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=dupl_stat)

anova(dupl_m3)
##                   numDF denDF   F-value p-value
## (Intercept)           1    74 143058.62  <.0001
## Tube                  2    74      0.29  0.7482
## RNAisolation          1    74    289.37  <.0001
## TimeLapse             2    74      1.15  0.3236
## Tube:RNAisolation     2    74      7.62  0.0010
## Tube:TimeLapse        4    74      2.53  0.0477

A quick check of the normality of the residuals. There seem to be some deviation in the tails. However, the methods used here are quite robust against such deviations.

qqnorm(dupl_m3$residuals)
qqline(dupl_m3$residuals)

For all combinations of tube and RNA isolation method, the effect of time is investigated. Tukey’s method for multiplicity correction is applied separately for each combination of tube and RNA isolation method. Hardly any significant time effects are detected. If multiple testing correction would have been applied to all test simultaneously, no significant results would have been left.

Note that this analysis is based on the method that includes all interaction terms, whereas a previous analysis has suggested that there is only evidence for interaction effects with tube. We believe it is still better to do this analysis under a less restrictive model (as we have done here).

dupl_m.time<-emmeans(dupl_m1, ~TimeLapse|Tube:RNAisolation)
plot(dupl_m.time, comparisons=TRUE)

pairs(dupl_m.time)
## Tube = Citrate, RNAisolation = miRNeasySPkit:
##  contrast  estimate      SE df t.ratio p.value
##  T0 - T04  -0.00850 0.00532 68  -1.598  0.2536
##  T0 - T16  -0.00036 0.00532 68  -0.068  0.9975
##  T04 - T16  0.00814 0.00532 68   1.530  0.2834
## 
## Tube = EDTA, RNAisolation = miRNeasySPkit:
##  contrast  estimate      SE df t.ratio p.value
##  T0 - T04   0.00096 0.00532 68   0.180  0.9822
##  T0 - T16   0.00880 0.00532 68   1.654  0.2304
##  T04 - T16  0.00784 0.00532 68   1.474  0.3099
## 
## Tube = serum, RNAisolation = miRNeasySPkit:
##  contrast  estimate      SE df t.ratio p.value
##  T0 - T04   0.00036 0.00532 68   0.068  0.9975
##  T0 - T16  -0.00552 0.00532 68  -1.038  0.5559
##  T04 - T16 -0.00588 0.00532 68  -1.105  0.5142
## 
## Tube = Citrate, RNAisolation = QIAamp:
##  contrast  estimate      SE df t.ratio p.value
##  T0 - T04  -0.01246 0.01375 68  -0.906  0.6385
##  T0 - T16   0.00650 0.01375 68   0.473  0.8844
##  T04 - T16  0.01896 0.01375 68   1.378  0.3578
## 
## Tube = EDTA, RNAisolation = QIAamp:
##  contrast  estimate      SE df t.ratio p.value
##  T0 - T04   0.02692 0.01375 68   1.957  0.1308
##  T0 - T16   0.03546 0.01375 68   2.578  0.0321
##  T04 - T16  0.00854 0.01375 68   0.621  0.8092
## 
## Tube = serum, RNAisolation = QIAamp:
##  contrast  estimate      SE df t.ratio p.value
##  T0 - T04  -0.00762 0.01375 68  -0.554  0.8448
##  T0 - T16  -0.00362 0.01375 68  -0.263  0.9626
##  T04 - T16  0.00400 0.01375 68   0.291  0.9545
## 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates
confint(dupl_m.time,adjust="mvt")
## Tube = Citrate, RNAisolation = miRNeasySPkit:
##  TimeLapse emmean      SE df lower.CL upper.CL
##  T0         0.954 0.00437  4    0.938    0.970
##  T04        0.962 0.00437  4    0.946    0.979
##  T16        0.954 0.00437  4    0.938    0.970
## 
## Tube = EDTA, RNAisolation = miRNeasySPkit:
##  TimeLapse emmean      SE df lower.CL upper.CL
##  T0         0.960 0.00437  4    0.944    0.976
##  T04        0.959 0.00437  4    0.943    0.976
##  T16        0.951 0.00437  4    0.935    0.968
## 
## Tube = serum, RNAisolation = miRNeasySPkit:
##  TimeLapse emmean      SE df lower.CL upper.CL
##  T0         0.953 0.00437  4    0.937    0.969
##  T04        0.952 0.00437  4    0.936    0.969
##  T16        0.958 0.00437  4    0.942    0.975
## 
## Tube = Citrate, RNAisolation = QIAamp:
##  TimeLapse emmean      SE df lower.CL upper.CL
##  T0         0.881 0.00997  4    0.844    0.919
##  T04        0.894 0.00997  4    0.857    0.931
##  T16        0.875 0.00997  4    0.838    0.912
## 
## Tube = EDTA, RNAisolation = QIAamp:
##  TimeLapse emmean      SE df lower.CL upper.CL
##  T0         0.916 0.00997  4    0.878    0.953
##  T04        0.889 0.00997  4    0.852    0.926
##  T16        0.880 0.00997  4    0.843    0.918
## 
## Tube = serum, RNAisolation = QIAamp:
##  TimeLapse emmean      SE df lower.CL upper.CL
##  T0         0.910 0.00997  4    0.872    0.947
##  T04        0.918 0.00997  4    0.880    0.955
##  T16        0.914 0.00997  4    0.876    0.951
## 
## Degrees-of-freedom method: containment 
## Confidence level used: 0.95 
## Conf-level adjustment: mvt method for 3 estimates

Next we compare all methods (i.e. combination of tube and RNA isolation method) in a pairwise fashion, but for each time point separately. Again Tukey’s multiple testing method is used, for each time point separately.

dupl_m.methods<-emmeans(dupl_m1, ~Tube*RNAisolation|TimeLapse)
plot(dupl_m.methods, comparisons=TRUE)

pairs(dupl_m.methods)
## TimeLapse = T0:
##  contrast                                    estimate      SE df t.ratio
##  Citrate miRNeasySPkit - EDTA miRNeasySPkit  -0.00646 0.00532 68  -1.214
##  Citrate miRNeasySPkit - serum miRNeasySPkit  0.00104 0.00532 68   0.195
##  Citrate miRNeasySPkit - Citrate QIAamp       0.07238 0.01043 68   6.941
##  Citrate miRNeasySPkit - EDTA QIAamp          0.03800 0.01043 68   3.644
##  Citrate miRNeasySPkit - serum QIAamp         0.04390 0.01043 68   4.210
##  EDTA miRNeasySPkit - serum miRNeasySPkit     0.00750 0.00532 68   1.410
##  EDTA miRNeasySPkit - Citrate QIAamp          0.07884 0.01043 68   7.560
##  EDTA miRNeasySPkit - EDTA QIAamp             0.04446 0.01043 68   4.264
##  EDTA miRNeasySPkit - serum QIAamp            0.05036 0.01043 68   4.829
##  serum miRNeasySPkit - Citrate QIAamp         0.07134 0.01043 68   6.841
##  serum miRNeasySPkit - EDTA QIAamp            0.03696 0.01043 68   3.544
##  serum miRNeasySPkit - serum QIAamp           0.04286 0.01043 68   4.110
##  Citrate QIAamp - EDTA QIAamp                -0.03438 0.01375 68  -2.500
##  Citrate QIAamp - serum QIAamp               -0.02848 0.01375 68  -2.071
##  EDTA QIAamp - serum QIAamp                   0.00590 0.01375 68   0.429
##  p.value
##   0.8284
##   1.0000
##   <.0001
##   0.0066
##   0.0010
##   0.7211
##   <.0001
##   0.0009
##   0.0001
##   <.0001
##   0.0090
##   0.0015
##   0.1387
##   0.3149
##   0.9981
## 
## TimeLapse = T04:
##  contrast                                    estimate      SE df t.ratio
##  Citrate miRNeasySPkit - EDTA miRNeasySPkit   0.00300 0.00532 68   0.564
##  Citrate miRNeasySPkit - serum miRNeasySPkit  0.00990 0.00532 68   1.861
##  Citrate miRNeasySPkit - Citrate QIAamp       0.06842 0.01043 68   6.561
##  Citrate miRNeasySPkit - EDTA QIAamp          0.07342 0.01043 68   7.041
##  Citrate miRNeasySPkit - serum QIAamp         0.04478 0.01043 68   4.294
##  EDTA miRNeasySPkit - serum miRNeasySPkit     0.00690 0.00532 68   1.297
##  EDTA miRNeasySPkit - Citrate QIAamp          0.06542 0.01043 68   6.274
##  EDTA miRNeasySPkit - EDTA QIAamp             0.07042 0.01043 68   6.753
##  EDTA miRNeasySPkit - serum QIAamp            0.04178 0.01043 68   4.007
##  serum miRNeasySPkit - Citrate QIAamp         0.05852 0.01043 68   5.612
##  serum miRNeasySPkit - EDTA QIAamp            0.06352 0.01043 68   6.091
##  serum miRNeasySPkit - serum QIAamp           0.03488 0.01043 68   3.345
##  Citrate QIAamp - EDTA QIAamp                 0.00500 0.01375 68   0.364
##  Citrate QIAamp - serum QIAamp               -0.02364 0.01375 68  -1.719
##  EDTA QIAamp - serum QIAamp                  -0.02864 0.01375 68  -2.082
##  p.value
##   0.9930
##   0.4348
##   <.0001
##   <.0001
##   0.0008
##   0.7857
##   <.0001
##   <.0001
##   0.0021
##   <.0001
##   <.0001
##   0.0162
##   0.9991
##   0.5245
##   0.3088
## 
## TimeLapse = T16:
##  contrast                                    estimate      SE df t.ratio
##  Citrate miRNeasySPkit - EDTA miRNeasySPkit   0.00270 0.00532 68   0.508
##  Citrate miRNeasySPkit - serum miRNeasySPkit -0.00412 0.00532 68  -0.774
##  Citrate miRNeasySPkit - Citrate QIAamp       0.07924 0.01043 68   7.599
##  Citrate miRNeasySPkit - EDTA QIAamp          0.07382 0.01043 68   7.079
##  Citrate miRNeasySPkit - serum QIAamp         0.04064 0.01043 68   3.897
##  EDTA miRNeasySPkit - serum miRNeasySPkit    -0.00682 0.00532 68  -1.282
##  EDTA miRNeasySPkit - Citrate QIAamp          0.07654 0.01043 68   7.340
##  EDTA miRNeasySPkit - EDTA QIAamp             0.07112 0.01043 68   6.820
##  EDTA miRNeasySPkit - serum QIAamp            0.03794 0.01043 68   3.638
##  serum miRNeasySPkit - Citrate QIAamp         0.08336 0.01043 68   7.994
##  serum miRNeasySPkit - EDTA QIAamp            0.07794 0.01043 68   7.474
##  serum miRNeasySPkit - serum QIAamp           0.04476 0.01043 68   4.292
##  Citrate QIAamp - EDTA QIAamp                -0.00542 0.01375 68  -0.394
##  Citrate QIAamp - serum QIAamp               -0.03860 0.01375 68  -2.806
##  EDTA QIAamp - serum QIAamp                  -0.03318 0.01375 68  -2.412
##  p.value
##   0.9957
##   0.9709
##   <.0001
##   <.0001
##   0.0030
##   0.7938
##   <.0001
##   <.0001
##   0.0067
##   <.0001
##   <.0001
##   0.0008
##   0.9987
##   0.0685
##   0.1666
## 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 6 estimates
confint(dupl_m.methods, adjust="mvt")
## TimeLapse = T0:
##  Tube    RNAisolation  emmean      SE df lower.CL upper.CL
##  Citrate miRNeasySPkit  0.954 0.00437  4    0.935    0.973
##  EDTA    miRNeasySPkit  0.960 0.00437  4    0.941    0.979
##  serum   miRNeasySPkit  0.953 0.00437  4    0.934    0.972
##  Citrate QIAamp         0.881 0.00997  4    0.838    0.925
##  EDTA    QIAamp         0.916 0.00997  4    0.872    0.959
##  serum   QIAamp         0.910 0.00997  4    0.867    0.953
## 
## TimeLapse = T04:
##  Tube    RNAisolation  emmean      SE df lower.CL upper.CL
##  Citrate miRNeasySPkit  0.962 0.00437  4    0.943    0.981
##  EDTA    miRNeasySPkit  0.959 0.00437  4    0.940    0.978
##  serum   miRNeasySPkit  0.952 0.00437  4    0.933    0.971
##  Citrate QIAamp         0.894 0.00997  4    0.851    0.937
##  EDTA    QIAamp         0.889 0.00997  4    0.846    0.932
##  serum   QIAamp         0.918 0.00997  4    0.874    0.961
## 
## TimeLapse = T16:
##  Tube    RNAisolation  emmean      SE df lower.CL upper.CL
##  Citrate miRNeasySPkit  0.954 0.00437  4    0.935    0.973
##  EDTA    miRNeasySPkit  0.951 0.00437  4    0.933    0.970
##  serum   miRNeasySPkit  0.958 0.00437  4    0.939    0.977
##  Citrate QIAamp         0.875 0.00997  4    0.832    0.918
##  EDTA    QIAamp         0.880 0.00997  4    0.837    0.924
##  serum   QIAamp         0.914 0.00997  4    0.870    0.957
## 
## Degrees-of-freedom method: containment 
## Confidence level used: 0.95 
## Conf-level adjustment: mvt method for 6 estimates

Compare for each timepoint-kit combination separately

dupl_m.time_kit<-emmeans(dupl_m1, ~Tube|RNAisolation:TimeLapse )
plot(dupl_m.time_kit, comparisons=TRUE)

pairs(dupl_m.time_kit)
## RNAisolation = miRNeasySPkit, TimeLapse = T0:
##  contrast        estimate      SE df t.ratio p.value
##  Citrate - EDTA  -0.00646 0.00532 68  -1.214  0.4489
##  Citrate - serum  0.00104 0.00532 68   0.195  0.9792
##  EDTA - serum     0.00750 0.00532 68   1.410  0.3416
## 
## RNAisolation = QIAamp, TimeLapse = T0:
##  contrast        estimate      SE df t.ratio p.value
##  Citrate - EDTA  -0.03438 0.01375 68  -2.500  0.0390
##  Citrate - serum -0.02848 0.01375 68  -2.071  0.1036
##  EDTA - serum     0.00590 0.01375 68   0.429  0.9037
## 
## RNAisolation = miRNeasySPkit, TimeLapse = T04:
##  contrast        estimate      SE df t.ratio p.value
##  Citrate - EDTA   0.00300 0.00532 68   0.564  0.8397
##  Citrate - serum  0.00990 0.00532 68   1.861  0.1580
##  EDTA - serum     0.00690 0.00532 68   1.297  0.4018
## 
## RNAisolation = QIAamp, TimeLapse = T04:
##  contrast        estimate      SE df t.ratio p.value
##  Citrate - EDTA   0.00500 0.01375 68   0.364  0.9298
##  Citrate - serum -0.02364 0.01375 68  -1.719  0.2056
##  EDTA - serum    -0.02864 0.01375 68  -2.082  0.1011
## 
## RNAisolation = miRNeasySPkit, TimeLapse = T16:
##  contrast        estimate      SE df t.ratio p.value
##  Citrate - EDTA   0.00270 0.00532 68   0.508  0.8679
##  Citrate - serum -0.00412 0.00532 68  -0.774  0.7199
##  EDTA - serum    -0.00682 0.00532 68  -1.282  0.4102
## 
## RNAisolation = QIAamp, TimeLapse = T16:
##  contrast        estimate      SE df t.ratio p.value
##  Citrate - EDTA  -0.00542 0.01375 68  -0.394  0.9181
##  Citrate - serum -0.03860 0.01375 68  -2.806  0.0177
##  EDTA - serum    -0.03318 0.01375 68  -2.412  0.0481
## 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates
confint(dupl_m.time_kit, adjust="mvt")
## RNAisolation = miRNeasySPkit, TimeLapse = T0:
##  Tube    emmean      SE df lower.CL upper.CL
##  Citrate  0.954 0.00437  4    0.938    0.970
##  EDTA     0.960 0.00437  4    0.944    0.976
##  serum    0.953 0.00437  4    0.937    0.969
## 
## RNAisolation = QIAamp, TimeLapse = T0:
##  Tube    emmean      SE df lower.CL upper.CL
##  Citrate  0.881 0.00997  4    0.844    0.919
##  EDTA     0.916 0.00997  4    0.878    0.953
##  serum    0.910 0.00997  4    0.873    0.947
## 
## RNAisolation = miRNeasySPkit, TimeLapse = T04:
##  Tube    emmean      SE df lower.CL upper.CL
##  Citrate  0.962 0.00437  4    0.946    0.979
##  EDTA     0.959 0.00437  4    0.943    0.976
##  serum    0.952 0.00437  4    0.936    0.969
## 
## RNAisolation = QIAamp, TimeLapse = T04:
##  Tube    emmean      SE df lower.CL upper.CL
##  Citrate  0.894 0.00997  4    0.857    0.931
##  EDTA     0.889 0.00997  4    0.852    0.926
##  serum    0.918 0.00997  4    0.880    0.955
## 
## RNAisolation = miRNeasySPkit, TimeLapse = T16:
##  Tube    emmean      SE df lower.CL upper.CL
##  Citrate  0.954 0.00437  4    0.938    0.970
##  EDTA     0.951 0.00437  4    0.935    0.968
##  serum    0.958 0.00437  4    0.942    0.974
## 
## RNAisolation = QIAamp, TimeLapse = T16:
##  Tube    emmean      SE df lower.CL upper.CL
##  Citrate  0.875 0.00997  4    0.838    0.912
##  EDTA     0.880 0.00997  4    0.843    0.918
##  serum    0.914 0.00997  4    0.876    0.951
## 
## Degrees-of-freedom method: containment 
## Confidence level used: 0.95 
## Conf-level adjustment: mvt method for 3 estimates

We can conclude there is a small tube-kit and tube-time interaction; the serum tube at timepoint 16h seems to have more duplicates when extracted with the QIAamp kit. However, the difference is negligible.

Dupl_m.kit<-emmeans(dupl_m1, ~Tube|RNAisolation)
## NOTE: Results may be misleading due to involvement in interactions
p_val.kit_dupl <- formatC(summary(pairs(Dupl_m.kit))$p.value, format="e", 2)

Dupl_pwc.kit <-  dupl_stat %>% group_by(RNAisolation) %>% pairwise_t_test(duplicates ~ Tube) %>% add_xy_position(x = "Tube")
Dupl_pwc.kit$p.adj <- p_val.kit_dupl
Dupl_pwc.kit$p.adj <- as.numeric(Dupl_pwc.kit$p.adj)

Dupl_pwc.kit$p.adj.signif <- ifelse(Dupl_pwc.kit$p.adj <= 0.001, "***", ifelse(Dupl_pwc.kit$p.adj<=0.01, "**", ifelse(Dupl_pwc.kit$p.adj<=0.05, "*", "ns")))

Dupl_pwc.kit
## # A tibble: 6 × 14
##   RNAisolation  .y.        group1  group2    n1    n2       p p.signif    p.adj
##   <fct>         <chr>      <chr>   <chr>  <int> <int>   <dbl> <chr>       <dbl>
## 1 miRNeasySPkit duplicates Citrate EDTA      15    15 0.943   ns       0.996   
## 2 miRNeasySPkit duplicates Citrate serum     15    15 0.52    ns       0.741   
## 3 miRNeasySPkit duplicates EDTA    serum     15    15 0.475   ns       0.69    
## 4 QIAamp        duplicates Citrate EDTA      15    15 0.184   ns       0.316   
## 5 QIAamp        duplicates Citrate serum     15    15 0.00106 **       0.000876
## 6 QIAamp        duplicates EDTA    serum     15    15 0.0359  *        0.0561  
## # ℹ 5 more variables: p.adj.signif <chr>, y.position <dbl>,
## #   groups <named list>, xmin <dbl>, xmax <dbl>
Dupl.kit_plot<-ggplot(dupl_stat, aes(x = Tube, y = duplicates, color=Tube)) +
  geom_boxplot() +
  geom_point(size=0.8) +
  facet_wrap(~ as.factor(RNAisolation)) +
  stat_pvalue_manual(Dupl_pwc.kit, label = "{scales::pvalue(p.adj)}", hide.ns = T, size=3, bracket.size=0.2, tip.length = 0.01) +
  scale_color_manual(values=color_panel)+
  scale_fill_manual(values=color_panel)+
  theme_point + 
  theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.position = 'none', axis.title.y = element_text(size=10))+
  labs(y = "duplication")

Dupl.kit_plot
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

Dupl_m.time<-emmeans(dupl_m1, ~Tube|TimeLapse)
## NOTE: Results may be misleading due to involvement in interactions
p_val.time_dupl <- formatC(summary(pairs(Dupl_m.time))$p.value, format="e", 2)

Dupl_pwc.time <-  dupl_stat %>% group_by(TimeLapse) %>% pairwise_t_test(duplicates ~ Tube) %>% add_xy_position(x = "Tube")
Dupl_pwc.time$p.adj <- p_val.time_dupl
Dupl_pwc.time$p.adj <- as.numeric(Dupl_pwc.time$p.adj)
Dupl_pwc.time$y.position[9]<-0.98

Dupl_pwc.time$p.adj.signif <- ifelse(Dupl_pwc.time$p.adj <= 0.001, "***", ifelse(Dupl_pwc.time$p.adj<=0.01, "**", ifelse(Dupl_pwc.time$p.adj<=0.05, "*", "ns")))

Dupl_pwc.time
## # A tibble: 9 × 14
##   TimeLapse .y.     group1 group2    n1    n2     p p.signif  p.adj p.adj.signif
##   <fct>     <chr>   <chr>  <chr>  <int> <int> <dbl> <chr>     <dbl> <chr>       
## 1 T0        duplic… Citra… EDTA      10    10 0.189 ns       0.0196 *           
## 2 T0        duplic… Citra… serum     10    10 0.373 ns       0.158  ns          
## 3 T0        duplic… EDTA   serum     10    10 0.662 ns       0.637  ns          
## 4 T04       duplic… Citra… EDTA      10    10 0.801 ns       0.851  ns          
## 5 T04       duplic… Citra… serum     10    10 0.666 ns       0.622  ns          
## 6 T04       duplic… EDTA   serum     10    10 0.495 ns       0.31   ns          
## 7 T16       duplic… Citra… EDTA      10    10 0.938 ns       0.981  ns          
## 8 T16       duplic… Citra… serum     10    10 0.23  ns       0.0139 *           
## 9 T16       duplic… EDTA   serum     10    10 0.26  ns       0.0227 *           
## # ℹ 4 more variables: y.position <dbl>, groups <named list>, xmin <dbl>,
## #   xmax <dbl>
Dupl.time_plot<-ggplot(dupl_stat, aes(x = Tube, y = duplicates, color=Tube)) +
  geom_boxplot() +
  geom_point(size=0.8) +
  facet_wrap(~ as.factor(TimeLapse)) +
  stat_pvalue_manual(Dupl_pwc.time, label = "{scales::pvalue(p.adj)}", hide.ns = T, size=3, bracket.size=0.2, tip.length = 0.01) +
  scale_color_manual(values=color_panel)+
  scale_fill_manual(values=color_panel)+
  theme_point2 + 
  theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.position = 'none', axis.title.y = element_text(size=10))+
  labs(y = "duplication")

Dupl.time_plot
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

# # tried to make heatmap for p-values but result is not ok; should be left out
# m_dupl.methods<-emmeans(m_dupl, ~Tube*RNAisolation|TimeLapse )
# plot(m_dupl.methods, comparisons=TRUE)
# pairs(m_dupl.methods)
# confint(m_dupl.methods, adjust="mvt")
# 
# # subset duplicates for T0
# dupl_T0 <- subset(duplicates, TimeLapse == "T0")
# m_dupl_T0 <-lme(duplicates~Tube*RNAisolation,
#        random=~1|donorID,
#        weights = varIdent(form = ~ 1 | RNAisolation),
#        data=dupl_T0)
# 
# m_dupl_T0.methods<-emmeans(m_dupl_T0, ~Tube*RNAisolation)
# 
# # make heatmap of p-values for T0
# m_dupl_T0pwp <- pwpm(m_dupl_T0.methods)
# m_dupl_T0pwp <- sub("[<>]", "", m_dupl_T0pwp)
# p_dupl_T0.matx<-matrix(as.numeric((m_dupl_T0pwp)),nrow = length(m_dupl_T0pwp[,1]),ncol = length(m_dupl_T0pwp[,1]))
# rownames(p_dupl_T0.matx) <- colnames(p_dupl_T0.matx) <- rownames(m_dupl_T0pwp)
# #p_dupl_T0.matx[lower.tri(p_dupl_T0.matx, diag=TRUE)] <- NA
# #melt(p_dupl_T0.matx) %>%
# #  ggplot(aes(Var1, Var2, fill = value)) + geom_tile() +
# #  geom_text(aes(label = value))
# 
# allcolors = colorRampPalette(brewer.pal(8,"Blues"))(100)
# colorscale = c(allcolors,allcolors)
# 
# corrplot(p_dupl_T0.matx, method = 'shade', type = 'upper',
#          diag = FALSE,
#          tl.srt = 45, #text labels rotated 45 degrees
#          number.cex=0.8, 
#          addCoef.col = "black", # Add p-value
#          tl.col = "black", #text label color
#          tl.cex = 0.8,
#          col=colorscale)

4 RNA concentration

4.1 Relative RNA conc in plasma: endogenous RNA vs Sequins

conc_stat <- read.csv("./data_output/gene_level_ratios.csv",sep=",",header = TRUE)

conc_stat$Tube<-as.factor(conc_stat$Tube)
conc_stat$RNAisolation<-as.factor(conc_stat$RNAisolation)
conc_stat$TimeLapse<-as.factor(conc_stat$TimeLapse)
conc_stat$donorID<-as.factor(conc_stat$donorID)
conc_stat <- conc_stat %>%
  mutate("EndovsERCC_InputCorr"= (endogenous/ERCC)*EluateV/PlasmaInputV,
         "SeqvsERCC_InputCorr" = (Sequin/ERCC)*EluateV/PlasmaInputV)

Tube.levels<-levels(conc_stat$Tube)
RNAisolation.levels<-levels(conc_stat$RNAisolation)
TimeLapse.levels<-levels(conc_stat$TimeLapse)

Linear mixed-effects model with crossed fixed effects of tube, kit and timelapse.

EndovsSeq_m1<-lme(EndovsSeq~Tube*RNAisolation*TimeLapse,
       random=~1|donorID, 
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=conc_stat)

anova(EndovsSeq_m1)
##                             numDF denDF   F-value p-value
## (Intercept)                     1    68  73.27944  <.0001
## Tube                            2    68   0.05347  0.9480
## RNAisolation                    1    68 255.52563  <.0001
## TimeLapse                       2    68   4.93571  0.0100
## Tube:RNAisolation               2    68   0.96579  0.3858
## Tube:TimeLapse                  4    68   5.93428  0.0004
## RNAisolation:TimeLapse          2    68   1.01592  0.3675
## Tube:RNAisolation:TimeLapse     4    68   1.46876  0.2213
anova_EndovsSeq_m1 <- round(anova(EndovsSeq_m1)[c(5, 6, 7), c(4)], digits = 3)

The model indicates that the effect of time is not the same for all tubes. There is no significant third-order interaction.

Next, we look at second-order interactions:

EndovsSeq_m2<-lme(EndovsSeq~(Tube+RNAisolation+TimeLapse)^2,
       random=~1|donorID, 
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=conc_stat)

anova(EndovsSeq_m2)
##                        numDF denDF   F-value p-value
## (Intercept)                1    72  73.06510  <.0001
## Tube                       2    72   0.05419  0.9473
## RNAisolation               1    72 244.50070  <.0001
## TimeLapse                  2    72   4.90356  0.0101
## Tube:RNAisolation          2    72   0.92412  0.4015
## Tube:TimeLapse             4    72   5.88942  0.0004
## RNAisolation:TimeLapse     2    72   0.97209  0.3832

Again, we see a tube-time interaction. Finally, we only include the tube-time effect in the model.

EndovsSeq_m3<-lme(EndovsSeq~Tube+RNAisolation+TimeLapse+
             Tube:TimeLapse,
       random=~1|donorID,
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=conc_stat)

anova(EndovsSeq_m3)
##                numDF denDF   F-value p-value
## (Intercept)        1    76  73.08690  <.0001
## Tube               2    76   0.05411  0.9474
## RNAisolation       1    76 245.62068  <.0001
## TimeLapse          2    76   4.90680  0.0099
## Tube:TimeLapse     4    76   5.89395  0.0003

The normality of the residuals is checked.

qqnorm(EndovsSeq_m3$residuals)
qqline(EndovsSeq_m3$residuals)

For all tubes, the effect of time is investigated. The data is collapsed over RNA isolation method.

EndovsSeq_m.time<-emmeans(EndovsSeq_m3, ~TimeLapse|Tube)
plot(EndovsSeq_m.time, comparisons=TRUE)

pairs(EndovsSeq_m.time)
## Tube = Citrate:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04    28.692 25.5 76   1.124  0.5022
##  T0 - T16   -42.534 25.5 76  -1.667  0.2247
##  T04 - T16  -71.226 25.5 76  -2.791  0.0180
## 
## Tube = EDTA:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04    -0.586 25.5 76  -0.023  0.9997
##  T0 - T16  -103.443 25.5 76  -4.053  0.0004
##  T04 - T16 -102.857 25.5 76  -4.030  0.0004
## 
## Tube = serum:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04   -14.885 25.5 76  -0.583  0.8295
##  T0 - T16    33.208 25.5 76   1.301  0.3989
##  T04 - T16   48.093 25.5 76   1.884  0.1502
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates
confint(EndovsSeq_m.time,adjust="mvt")
## Tube = Citrate:
##  TimeLapse emmean   SE df lower.CL upper.CL
##  T0           526 37.7  4      398      655
##  T04          498 37.7  4      370      626
##  T16          569 37.7  4      441      697
## 
## Tube = EDTA:
##  TimeLapse emmean   SE df lower.CL upper.CL
##  T0           493 37.7  4      364      621
##  T04          493 37.7  4      365      622
##  T16          596 37.7  4      468      724
## 
## Tube = serum:
##  TimeLapse emmean   SE df lower.CL upper.CL
##  T0           533 37.7  4      404      661
##  T04          548 37.7  4      419      676
##  T16          500 37.7  4      371      628
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## Confidence level used: 0.95 
## Conf-level adjustment: mvt method for 3 estimates
p_val.time_Conc <- formatC(summary(pairs(EndovsSeq_m.time))$p.value, format="e", 2)
Conc_pwc.time <-  conc_stat %>% group_by(TimeLapse) %>% pairwise_t_test(EndovsSeq ~ Tube) %>% add_xy_position(x = "Tube")
Conc_pwc.time$p.adj <- p_val.time_Conc
Conc_pwc.time$p.adj <- as.numeric(Conc_pwc.time$p.adj)
Conc_pwc.time$p.adj.signif <- ifelse(Conc_pwc.time$p.adj <= 0.001, "***", ifelse(Conc_pwc.time$p.adj<=0.01, "**", ifelse(Conc_pwc.time$p.adj<=0.05, "*", "ns")))
Conc_pwc.time
## # A tibble: 9 × 14
##   TimeLapse .y.    group1 group2    n1    n2     p p.signif   p.adj p.adj.signif
##   <fct>     <chr>  <chr>  <chr>  <int> <int> <dbl> <chr>      <dbl> <chr>       
## 1 T0        Endov… Citra… EDTA      10    10 0.534 ns       5.02e-1 ns          
## 2 T0        Endov… Citra… serum     10    10 0.544 ns       2.25e-1 ns          
## 3 T0        Endov… EDTA   serum     10    10 0.224 ns       1.8 e-2 *           
## 4 T04       Endov… Citra… EDTA      10    10 0.808 ns       1   e+0 ns          
## 5 T04       Endov… Citra… serum     10    10 0.392 ns       3.53e-4 ***         
## 6 T04       Endov… EDTA   serum     10    10 0.538 ns       3.82e-4 ***         
## 7 T16       Endov… Citra… EDTA      10    10 0.975 ns       8.29e-1 ns          
## 8 T16       Endov… Citra… serum     10    10 0.403 ns       3.99e-1 ns          
## 9 T16       Endov… EDTA   serum     10    10 0.421 ns       1.5 e-1 ns          
## # ℹ 4 more variables: y.position <dbl>, groups <named list>, xmin <dbl>,
## #   xmax <dbl>
conc.Time_plot<-ggplot(conc_stat, aes(x = Tube, y = EndovsSeq, color=Tube)) +
  geom_boxplot() +
  geom_point(size=0.8) +
  facet_wrap(~ as.factor(TimeLapse)) +
  stat_pvalue_manual(Conc_pwc.time, label = "{scales::pvalue(p.adj)}", hide.ns = T, size=3, bracket.size=0.2, tip.length = 0.01) +
  scale_color_manual(values=color_panel)+
  scale_fill_manual(values=color_panel)+
  theme_point + 
  theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.position = 'none', axis.title.y = element_text(size=10))+
  labs(y = "RNA concentration")

conc.Time_plot
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

EndovsSeq_m.kit<-emmeans(EndovsSeq_m3, ~Tube|RNAisolation)
p_val.kit_Conc <- formatC(summary(pairs(EndovsSeq_m.kit))$p.value, format="e", 2)

Conc_pwc.kit <-  conc_stat %>% group_by(RNAisolation) %>% pairwise_t_test(EndovsSeq ~ Tube) %>% add_xy_position(x = "Tube")
Conc_pwc.kit$p.adj <- p_val.kit_Conc
Conc_pwc.kit$p.adj <- as.numeric(Conc_pwc.kit$p.adj)
Conc_pwc.kit$p.adj.signif <- ifelse(Conc_pwc.kit$p.adj <= 0.001, "***", ifelse(Conc_pwc.kit$p.adj<=0.01, "**", ifelse(Conc_pwc.kit$p.adj<=0.05, "*", "ns")))
Conc_pwc.kit
## # A tibble: 6 × 14
##   RNAisolation .y.   group1 group2    n1    n2     p p.signif p.adj p.adj.signif
##   <fct>        <chr> <chr>  <chr>  <int> <int> <dbl> <chr>    <dbl> <chr>       
## 1 miRNeasySPk… Endo… Citra… EDTA      15    15 0.923 ns       0.963 ns          
## 2 miRNeasySPk… Endo… Citra… serum     15    15 0.814 ns       0.95  ns          
## 3 miRNeasySPk… Endo… EDTA   serum     15    15 0.89  ns       0.999 ns          
## 4 QIAamp       Endo… Citra… EDTA      15    15 0.644 ns       0.963 ns          
## 5 QIAamp       Endo… Citra… serum     15    15 0.482 ns       0.95  ns          
## 6 QIAamp       Endo… EDTA   serum     15    15 0.247 ns       0.999 ns          
## # ℹ 4 more variables: y.position <dbl>, groups <named list>, xmin <dbl>,
## #   xmax <dbl>
conc.kit_plot<-ggplot(conc_stat, aes(x = Tube, y = EndovsSeq, color=Tube)) +
  geom_boxplot() +
  geom_point(size=0.8) +
  facet_wrap(~ as.factor(RNAisolation)) +
  stat_pvalue_manual(Conc_pwc.kit, label = "{scales::pvalue(p.adj)}", hide.ns = T, size=3, bracket.size=0.2, tip.length = 0.01) +
  scale_color_manual(values=color_panel)+
  scale_fill_manual(values=color_panel)+
  theme_point2 + 
  theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.position = 'none', axis.title.y = element_text(size=10))+
  labs(y = "RNA concentration")

conc.kit_plot
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

At timepoint 16h, we see a significantly higher endogenous/Sequin ratio in EDTA compared to other tubes.

4.2 Relative RNA concentration in eluate: Endogenous vs ERCC

Linear mixed-effects model with crossed fixed effects of tube, kit and timelapse.

EndovsERCC_m1<-lme(EndovsERCC~Tube*RNAisolation*TimeLapse,
       random=~1|donorID, 
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=conc_stat)

anova(EndovsERCC_m1)
##                             numDF denDF  F-value p-value
## (Intercept)                     1    68 115.6090  <.0001
## Tube                            2    68   1.6995  0.1905
## RNAisolation                    1    68 491.2657  <.0001
## TimeLapse                       2    68   3.4864  0.0362
## Tube:RNAisolation               2    68   2.2712  0.1110
## Tube:TimeLapse                  4    68   2.8336  0.0310
## RNAisolation:TimeLapse          2    68   3.1217  0.0505
## Tube:RNAisolation:TimeLapse     4    68   1.4079  0.2407

Borderline significant tube-time interaction. There is no third-order interaction with RNA isolation kit. Next, we look at second-order interactions:

EndovsERCC_m2<-lme(EndovsERCC~(Tube+RNAisolation+TimeLapse)^2,
       random=~1|donorID, 
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=conc_stat)

anova(EndovsERCC_m2)
##                        numDF denDF  F-value p-value
## (Intercept)                1    72 114.6572  <.0001
## Tube                       2    72   1.6950  0.1908
## RNAisolation               1    72 475.7598  <.0001
## TimeLapse                  2    72   3.3753  0.0397
## Tube:RNAisolation          2    72   2.1996  0.1182
## Tube:TimeLapse             4    72   2.7698  0.0336
## RNAisolation:TimeLapse     2    72   3.0232  0.0549

Finally, we only include the tube-time interaction in the model.

EndovsERCC_m3<-lme(EndovsERCC~Tube+RNAisolation+TimeLapse+
                     Tube:TimeLapse,
       random=~1|donorID,
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=conc_stat)

anova(EndovsERCC_m3)
##                numDF denDF  F-value p-value
## (Intercept)        1    76 111.2121  <.0001
## Tube               2    76   1.6898  0.1914
## RNAisolation       1    76 423.9376  <.0001
## TimeLapse          2    76   3.0173  0.0548
## Tube:TimeLapse     4    76   2.5662  0.0448

The normality of the residuals is checked.

qqnorm(EndovsERCC_m3$residuals)
qqline(EndovsERCC_m3$residuals)

For all tubes, the effect of time is investigated. The data is collapsed over RNA isolation method.

EndovsERCC_m.time<-emmeans(EndovsERCC_m3, ~TimeLapse|Tube)
plot(EndovsERCC_m.time, comparisons=TRUE)

pairs(EndovsERCC_m.time)
## Tube = Citrate:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04      8.45 21.8 76   0.388  0.9203
##  T0 - T16    -33.35 21.8 76  -1.533  0.2813
##  T04 - T16   -41.80 21.8 76  -1.922  0.1396
## 
## Tube = EDTA:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04    -12.18 21.8 76  -0.560  0.8415
##  T0 - T16    -67.48 21.8 76  -3.103  0.0075
##  T04 - T16   -55.30 21.8 76  -2.542  0.0345
## 
## Tube = serum:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04     10.10 21.8 76   0.464  0.8882
##  T0 - T16     24.06 21.8 76   1.106  0.5133
##  T04 - T16    13.96 21.8 76   0.642  0.7976
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates
confint(EndovsERCC_m.time,adjust="mvt")
## Tube = Citrate:
##  TimeLapse emmean   SE df lower.CL upper.CL
##  T0           216 19.5  4      145      288
##  T04          208 19.5  4      136      279
##  T16          249 19.5  4      178      321
## 
## Tube = EDTA:
##  TimeLapse emmean   SE df lower.CL upper.CL
##  T0           198 19.5  4      126      270
##  T04          210 19.5  4      138      282
##  T16          265 19.5  4      194      337
## 
## Tube = serum:
##  TimeLapse emmean   SE df lower.CL upper.CL
##  T0           256 19.5  4      184      328
##  T04          246 19.5  4      174      317
##  T16          232 19.5  4      160      303
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## Confidence level used: 0.95 
## Conf-level adjustment: mvt method for 3 estimates

The higher endogenous/ERCC ratio in EDTA at timepoint 16h is hardly significant.

4.3 RNA extraction efficiency

Linear mixed-effects model with crossed fixed effects of tube, kit and timelapse.

EndovsERCC_InputCorr_m1<-lme(EndovsERCC_InputCorr~Tube*RNAisolation*TimeLapse,
       random=~1|donorID, 
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=conc_stat)

anova(EndovsERCC_InputCorr_m1)
##                             numDF denDF   F-value p-value
## (Intercept)                     1    68 185.61122  <.0001
## Tube                            2    68   1.77026  0.1780
## RNAisolation                    1    68  57.30531  <.0001
## TimeLapse                       2    68   7.25478  0.0014
## Tube:RNAisolation               2    68   2.25556  0.1126
## Tube:TimeLapse                  4    68   3.69286  0.0088
## RNAisolation:TimeLapse          2    68   0.28284  0.7545
## Tube:RNAisolation:TimeLapse     4    68   0.87872  0.4814
anova_EndovsERCC_InputCorr_m1 <- round(anova(EndovsERCC_InputCorr_m1)[c(5, 6, 7), c(4)], digits = 3)

The model indicates a significant tube-time interaction. There is no third-order interaction with RNA isolation kit. Next, we look at second-order interactions:

EndovsERCC_InputCorr_m2<-lme(EndovsERCC_InputCorr~(Tube+RNAisolation+TimeLapse)^2,
       random=~1|donorID, 
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=conc_stat)

anova(EndovsERCC_InputCorr_m2)
##                        numDF denDF   F-value p-value
## (Intercept)                1    72 185.46415  <.0001
## Tube                       2    72   1.76938  0.1778
## RNAisolation               1    72  57.97366  <.0001
## TimeLapse                  2    72   7.26638  0.0013
## Tube:RNAisolation          2    72   2.28187  0.1094
## Tube:TimeLapse             4    72   3.70089  0.0085
## RNAisolation:TimeLapse     2    72   0.28614  0.7520

Again, we see a significant tube-time interaction. Finally, we only include the tube-time interaction in the model.

EndovsERCC_InputCorr_m3<-lme(EndovsERCC_InputCorr~Tube+RNAisolation+TimeLapse+
             Tube:TimeLapse,
       random=~1|donorID,
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=conc_stat)

anova(EndovsERCC_InputCorr_m3)
##                numDF denDF   F-value p-value
## (Intercept)        1    76 185.77141  <.0001
## Tube               2    76   1.77126  0.1771
## RNAisolation       1    76  56.57807  <.0001
## TimeLapse          2    76   7.24223  0.0013
## Tube:TimeLapse     4    76   3.68416  0.0085

Check of normality of the residuals.

qqnorm(EndovsERCC_InputCorr_m3$residuals)
qqline(EndovsERCC_InputCorr_m3$residuals)

For all tubes, the effect of time is investigated. The data is collapsed over RNA isolation method.

EndovsERCC_InputCorr_m.time<-emmeans(EndovsERCC_InputCorr_m3, ~TimeLapse|Tube)
plot(EndovsERCC_InputCorr_m.time, comparisons=TRUE)

pairs(EndovsERCC_InputCorr_m.time)
## Tube = Citrate:
##  contrast  estimate    SE df t.ratio p.value
##  T0 - T04    0.3695 0.326 76   1.133  0.4970
##  T0 - T16   -0.8132 0.326 76  -2.493  0.0390
##  T04 - T16  -1.1827 0.326 76  -3.626  0.0015
## 
## Tube = EDTA:
##  contrast  estimate    SE df t.ratio p.value
##  T0 - T04   -0.5219 0.326 76  -1.600  0.2518
##  T0 - T16   -1.2617 0.326 76  -3.868  0.0007
##  T04 - T16  -0.7398 0.326 76  -2.268  0.0666
## 
## Tube = serum:
##  contrast  estimate    SE df t.ratio p.value
##  T0 - T04   -0.0859 0.326 76  -0.263  0.9625
##  T0 - T16    0.1052 0.326 76   0.322  0.9443
##  T04 - T16   0.1911 0.326 76   0.586  0.8281
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates
confint(EndovsERCC_InputCorr_m.time,adjust="mvt")
## Tube = Citrate:
##  TimeLapse emmean    SE df lower.CL upper.CL
##  T0          4.24 0.356  4     2.97     5.51
##  T04         3.87 0.356  4     2.60     5.14
##  T16         5.05 0.356  4     3.78     6.32
## 
## Tube = EDTA:
##  TimeLapse emmean    SE df lower.CL upper.CL
##  T0          3.44 0.356  4     2.17     4.71
##  T04         3.96 0.356  4     2.70     5.23
##  T16         4.70 0.356  4     3.43     5.97
## 
## Tube = serum:
##  TimeLapse emmean    SE df lower.CL upper.CL
##  T0          4.28 0.356  4     3.01     5.55
##  T04         4.36 0.356  4     3.09     5.63
##  T16         4.17 0.356  4     2.90     5.44
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## Confidence level used: 0.95 
## Conf-level adjustment: mvt method for 3 estimates

Both in the EDTA en citrate tubes there seems to be a significantly higher extraction efficiency at timepoint 16h.

EndovsERCC_m.kit<-emmeans(EndovsERCC_InputCorr_m3, ~Tube|RNAisolation)
p_val.kit_EndovsERCC <- formatC(summary(pairs(EndovsERCC_m.kit))$p.value, format="e", 2)

EndovsERCC_pwc.kit <-  conc_stat %>% group_by(RNAisolation) %>% pairwise_t_test(EndovsERCC ~ Tube) %>% add_xy_position(x = "Tube")

EndovsERCC_pwc.kit$p.adj <- p_val.kit_EndovsERCC
EndovsERCC_pwc.kit$p.adj <- as.numeric(EndovsERCC_pwc.kit$p.adj)
EndovsERCC_pwc.kit$p.adj.signif <- ifelse(EndovsERCC_pwc.kit$p.adj <= 0.001, "***", ifelse(EndovsERCC_pwc.kit$p.adj<=0.01, "**", ifelse(EndovsERCC_pwc.kit$p.adj<=0.05, "*", "ns")))
EndovsERCC_pwc.kit
## # A tibble: 6 × 14
##   RNAisolation .y.   group1 group2    n1    n2     p p.signif p.adj p.adj.signif
##   <fct>        <chr> <chr>  <chr>  <int> <int> <dbl> <chr>    <dbl> <chr>       
## 1 miRNeasySPk… Endo… Citra… EDTA      15    15 0.53  ns       0.162 ns          
## 2 miRNeasySPk… Endo… Citra… serum     15    15 0.071 ns       0.82  ns          
## 3 miRNeasySPk… Endo… EDTA   serum     15    15 0.229 ns       0.431 ns          
## 4 QIAamp       Endo… Citra… EDTA      15    15 0.157 ns       0.162 ns          
## 5 QIAamp       Endo… Citra… serum     15    15 0.484 ns       0.82  ns          
## 6 QIAamp       Endo… EDTA   serum     15    15 0.468 ns       0.431 ns          
## # ℹ 4 more variables: y.position <dbl>, groups <named list>, xmin <dbl>,
## #   xmax <dbl>
EndovsERCC.kit_plot<-ggplot(conc_stat, aes(x = Tube, y = EndovsERCC, color=Tube)) +
  geom_boxplot() +
  geom_point(size=0.8) +
  facet_wrap(~ as.factor(RNAisolation)) +
  stat_pvalue_manual(EndovsERCC_pwc.kit, label = "{scales::pvalue(p.adj)}", hide.ns = T, size=3, bracket.size=0.2, tip.length = 0.01) +
  scale_color_manual(values=color_panel)+
  scale_fill_manual(values=color_panel)+
  theme_point2 + 
  theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.position = 'none', axis.title.y = element_text(size=10))+
  labs(y = "extraction efficiency")

EndovsERCC.kit_plot
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

EndovsERCC_m.time<-emmeans(EndovsERCC_InputCorr_m3, ~Tube|TimeLapse)
p_val.time_EndovsERCC <- formatC(summary(pairs(EndovsERCC_m.time))$p.value, format="e", 2)

EndovsERCC_pwc.time <-  conc_stat %>% group_by(TimeLapse) %>% pairwise_t_test(EndovsERCC ~ Tube) %>% add_xy_position(x = "Tube")

EndovsERCC_pwc.time$p.adj <- p_val.time_EndovsERCC
EndovsERCC_pwc.time$p.adj <- as.numeric(EndovsERCC_pwc.time$p.adj)
EndovsERCC_pwc.time$p.adj.signif <- ifelse(EndovsERCC_pwc.time$p.adj <= 0.001, "***", ifelse(EndovsERCC_pwc.time$p.adj<=0.01, "**", ifelse(EndovsERCC_pwc.time$p.adj<=0.05, "*", "ns")))
EndovsERCC_pwc.time
## # A tibble: 9 × 14
##   TimeLapse .y.     group1 group2    n1    n2     p p.signif  p.adj p.adj.signif
##   <fct>     <chr>   <chr>  <chr>  <int> <int> <dbl> <chr>     <dbl> <chr>       
## 1 T0        Endovs… Citra… EDTA      10    10 0.402 ns       0.045  *           
## 2 T0        Endovs… Citra… serum     10    10 0.791 ns       0.991  ns          
## 3 T0        Endovs… EDTA   serum     10    10 0.273 ns       0.0329 *           
## 4 T04       Endovs… Citra… EDTA      10    10 0.922 ns       0.952  ns          
## 5 T04       Endovs… Citra… serum     10    10 0.526 ns       0.286  ns          
## 6 T04       Endovs… EDTA   serum     10    10 0.591 ns       0.443  ns          
## 7 T16       Endovs… Citra… EDTA      10    10 0.854 ns       0.542  ns          
## 8 T16       Endovs… Citra… serum     10    10 0.467 ns       0.0236 *           
## 9 T16       Endovs… EDTA   serum     10    10 0.585 ns       0.239  ns          
## # ℹ 4 more variables: y.position <dbl>, groups <named list>, xmin <dbl>,
## #   xmax <dbl>
EndovsERCC.time_plot<-ggplot(conc_stat, aes(x = Tube, y = EndovsERCC, color=Tube)) +
  geom_boxplot() +
  geom_point(size=0.8) +
  facet_wrap(~ as.factor(TimeLapse)) +
  stat_pvalue_manual(EndovsERCC_pwc.time, label = "{scales::pvalue(p.adj)}", hide.ns = T, size=3, bracket.size=0.2, tip.length = 0.01) +
  scale_color_manual(values=color_panel)+
  scale_fill_manual(values=color_panel)+
  theme_point + 
  theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.position = 'none', axis.title.y = element_text(size=10))+
  labs(y = "extraction efficiency")

EndovsERCC.time_plot
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

5 Gene count

5.1 absolute numbers

gene_stat <- read.csv('./data_output/number_genes_detected.csv',sep=",",header = TRUE)

gene_stat$Tube<-as.factor(gene_stat$Tube)
gene_stat$RNAisolation<-as.factor(gene_stat$RNAisolation)
gene_stat$TimeLapse<-as.factor(gene_stat$TimeLapse)
gene_stat$donorID<-as.factor(gene_stat$donorID)

Tube.levels<-levels(gene_stat$Tube)
RNAisolation.levels<-levels(gene_stat$RNAisolation)
TimeLapse.levels<-levels(gene_stat$TimeLapse)

Linear mixed-effects model with crossed fixed effects of tube, kit and timelapse.

count_m1<-lme(genes_aboveTh~Tube*RNAisolation*TimeLapse,
       random=~1|donorID, 
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=gene_stat)

anova(count_m1)
##                             numDF denDF   F-value p-value
## (Intercept)                     1    68 313.30761  <.0001
## Tube                            2    68  17.80049  <.0001
## RNAisolation                    1    68 280.31382  <.0001
## TimeLapse                       2    68   0.30821  0.7358
## Tube:RNAisolation               2    68   5.67813  0.0052
## Tube:TimeLapse                  4    68   2.00919  0.1030
## RNAisolation:TimeLapse          2    68   0.95604  0.3895
## Tube:RNAisolation:TimeLapse     4    68   1.38852  0.2472
anova_count_m1 <- round(anova(count_m1)[c(5, 6, 7), c(4)], digits = 3)

There seems to be a significant tube-RNA isolation kit interaction. There is no third-order interaction with time. Next, we look at second-order interactions:

count_m2<-lme(genes_aboveTh~(Tube+RNAisolation+TimeLapse)^2,
       random=~1|donorID, 
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=gene_stat)

anova(count_m2)
##                        numDF denDF   F-value p-value
## (Intercept)                1    72 313.01274  <.0001
## Tube                       2    72  17.38210  <.0001
## RNAisolation               1    72 274.31953  <.0001
## TimeLapse                  2    72   0.30091  0.7411
## Tube:RNAisolation          2    72   5.55671  0.0057
## Tube:TimeLapse             4    72   1.96416  0.1091
## RNAisolation:TimeLapse     2    72   0.93559  0.3971

Finally, we only include the tube-kit interaction in the model.

count_m3<-lme(genes_aboveTh~Tube+RNAisolation+TimeLapse+
             Tube:RNAisolation,
       random=~1|donorID,
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=gene_stat)

anova(count_m3)
##                   numDF denDF   F-value p-value
## (Intercept)           1    78 306.12500  <.0001
## Tube                  2    78  15.71216  <.0001
## RNAisolation          1    78 260.15319  <.0001
## TimeLapse             2    78   0.27352  0.7614
## Tube:RNAisolation     2    78   5.26975  0.0071

Check of normality of the residuals.

qqnorm(count_m3$residuals)
qqline(count_m3$residuals)

For both RNA isolation methods, the effect of tube is investigated. The data is collapsed over time.

count_m.methods<-emmeans(count_m3, ~Tube|RNAisolation)
plot(count_m.methods, comparisons=TRUE)

pairs(count_m.methods)
## RNAisolation = miRNeasySPkit:
##  contrast        estimate  SE df t.ratio p.value
##  Citrate - EDTA        44 281 78   0.157  0.9866
##  Citrate - serum      575 281 78   2.046  0.1080
##  EDTA - serum         531 281 78   1.890  0.1484
## 
## RNAisolation = QIAamp:
##  contrast        estimate  SE df t.ratio p.value
##  Citrate - EDTA       649 334 78   1.944  0.1332
##  Citrate - serum     1986 334 78   5.947  <.0001
##  EDTA - serum        1337 334 78   4.002  0.0004
## 
## Results are averaged over the levels of: TimeLapse 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates
confint(count_m.methods, adjust="mvt")
## RNAisolation = miRNeasySPkit:
##  Tube    emmean  SE df lower.CL upper.CL
##  Citrate   4889 380  4     3578     6201
##  EDTA      4845 380  4     3534     6157
##  serum     4315 380  4     3004     5626
## 
## RNAisolation = QIAamp:
##  Tube    emmean  SE df lower.CL upper.CL
##  Citrate   8435 401  4     7027     9843
##  EDTA      7786 401  4     6377     9194
##  serum     6449 401  4     5041     7857
## 
## Results are averaged over the levels of: TimeLapse 
## Degrees-of-freedom method: containment 
## Confidence level used: 0.95 
## Conf-level adjustment: mvt method for 3 estimates

Serum has a significantly lower absolute count of protein-coding genes, but only if extracted with the QIAamp kit.

Pairwise T-test within kits

# Pairwise comparisons
gene_pwc <- gene_stat %>% group_by(RNAisolation) %>% pairwise_t_test(genes_aboveTh ~ Tube) %>% add_xy_position(x = "Tube")

# Show p-values
ggplot(gene_stat, aes(x = Tube, y = genes_aboveTh, color = Tube)) +
  geom_boxplot()+
  geom_point()+
  scale_colour_manual(values=color_panel)+
  facet_wrap(~ RNAisolation) +
  stat_pvalue_manual(gene_pwc, label = "p.signif") +
  labs(title = "tube comparison within kits (T test)", y = "absolute count of genes above threshold")

#ggsave("./data_output/plots/duplicates_stats.pdf", plot = ggplot2::last_plot(), dpi = 300)
count_m.time<-emmeans(count_m3, ~Tube|TimeLapse)
p_val.time_count <- formatC(summary(pairs(count_m.time))$p.value, format="e", 2)

count_pwc.time <-  gene_stat %>% group_by(TimeLapse) %>% pairwise_t_test(genes_aboveTh ~ Tube) %>% add_xy_position(x = "Tube")

count_pwc.time$p.adj <- p_val.time_count
count_pwc.time$p.adj <- as.numeric(count_pwc.time$p.adj)
count_pwc.time$p.adj.signif <- ifelse(count_pwc.time$p.adj <= 0.001, "***", ifelse(count_pwc.time$p.adj<=0.01, "**", ifelse(count_pwc.time$p.adj<=0.05, "*", "ns")))

count_pwc.time
## # A tibble: 9 × 14
##   TimeLapse .y.   group1 group2    n1    n2      p p.signif   p.adj p.adj.signif
##   <fct>     <chr> <chr>  <chr>  <int> <int>  <dbl> <chr>      <dbl> <chr>       
## 1 T0        gene… Citra… EDTA      10    10 0.27   ns       2.56e-1 ns          
## 2 T0        gene… Citra… serum     10    10 0.147  ns       3.02e-7 ***         
## 3 T0        gene… EDTA   serum     10    10 0.718  ns       1.55e-4 ***         
## 4 T04       gene… Citra… EDTA      10    10 0.924  ns       2.56e-1 ns          
## 5 T04       gene… Citra… serum     10    10 0.315  ns       3.02e-7 ***         
## 6 T04       gene… EDTA   serum     10    10 0.273  ns       1.55e-4 ***         
## 7 T16       gene… Citra… EDTA      10    10 0.774  ns       2.56e-1 ns          
## 8 T16       gene… Citra… serum     10    10 0.0483 *        3.02e-7 ***         
## 9 T16       gene… EDTA   serum     10    10 0.0866 ns       1.55e-4 ***         
## # ℹ 4 more variables: y.position <dbl>, groups <named list>, xmin <dbl>,
## #   xmax <dbl>
count.time_plot<-ggplot(gene_stat, aes(x = Tube, y = genes_aboveTh, color=Tube)) +
  geom_boxplot() +
  geom_point(size=0.8) +
  facet_wrap(~ as.factor(TimeLapse)) +
  stat_pvalue_manual(count_pwc.time, label = "{scales::pvalue(p.adj)}", hide.ns = T, size=3, bracket.size=0.2, tip.length = 0.01) +
  scale_color_manual(values=color_panel)+
  scale_fill_manual(values=color_panel)+
  theme_point2 + 
  theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.position = 'none', axis.title.y = element_text(size=10))+
  labs(y = "sensitivity")

count.time_plot
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

count_m.kit<-emmeans(count_m3, ~Tube|RNAisolation)
p_val.kit_count <- formatC(summary(pairs(count_m.kit))$p.value, format="e", 2)

count_pwc.kit <-  gene_stat %>% group_by(RNAisolation) %>% pairwise_t_test(genes_aboveTh ~ Tube) %>% add_xy_position(x = "Tube")

count_pwc.kit$p.adj <- p_val.kit_count
count_pwc.kit$p.adj <- as.numeric(count_pwc.kit$p.adj)
count_pwc.kit$p.adj.signif <- ifelse(count_pwc.kit$p.adj <= 0.001, "***", ifelse(count_pwc.kit$p.adj<=0.01, "**", ifelse(count_pwc.kit$p.adj<=0.05, "*", "ns")))

count_pwc.kit
## # A tibble: 6 × 14
##   RNAisolation  .y.           group1 group2    n1    n2       p p.signif   p.adj
##   <fct>         <chr>         <chr>  <chr>  <int> <int>   <dbl> <chr>      <dbl>
## 1 miRNeasySPkit genes_aboveTh Citra… EDTA      15    15 9.04e-1 ns       9.87e-1
## 2 miRNeasySPkit genes_aboveTh Citra… serum     15    15 1.21e-1 ns       1.08e-1
## 3 miRNeasySPkit genes_aboveTh EDTA   serum     15    15 1.52e-1 ns       1.48e-1
## 4 QIAamp        genes_aboveTh Citra… EDTA      15    15 1.3 e-1 ns       1.33e-1
## 5 QIAamp        genes_aboveTh Citra… serum     15    15 2.63e-5 ****     2.18e-7
## 6 QIAamp        genes_aboveTh EDTA   serum     15    15 2.79e-3 **       4.13e-4
## # ℹ 5 more variables: p.adj.signif <chr>, y.position <dbl>,
## #   groups <named list>, xmin <dbl>, xmax <dbl>
count.kit_plot<-ggplot(gene_stat, aes(x = Tube, y = genes_aboveTh, color=Tube)) +
  geom_boxplot() +
  geom_point(size=0.8) +
  facet_wrap(~ as.factor(RNAisolation)) +
  stat_pvalue_manual(count_pwc.kit, label = "{scales::pvalue(p.adj)}", hide.ns = T, size=3, bracket.size=0.2, tip.length = 0.01) +
  scale_color_manual(values=color_panel)+
  scale_fill_manual(values=color_panel)+
  theme_point + 
  theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.position = 'none', axis.title.y = element_text(size=10))+
  labs(y = "sensitivity")

count.kit_plot
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

6 Area left of the curve (ALC)

ALC <- read.csv('./data_output/ALC.csv',sep=",",header = TRUE)

ALC[c("Tube","RNAisolation")] <- str_split_fixed(ALC$tube_kitID, "_", 2)
ALC$Tube <- if_else(ALC$Tube=="CIT", "Citrate", if_else(ALC$Tube=="SER", "serum", "EDTA")) 
ALC$Tube <- as.factor(ALC$Tube)
ALC$RNAisolation <- as.factor(ALC$RNAisolation)
ALC$TimePoint <- as.factor(ALC$TimePoint)
ALC$BiologicalReplicate <- as.factor(ALC$BiologicalReplicate)
ALC <- na.omit(ALC)

Linear mixed-effects model with crossed fixed effects of tube, kit and timelapse.

ALC_m1<-lme(ALC_calc~Tube*RNAisolation*TimePoint,
       random=~1|BiologicalReplicate, 
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=ALC)

anova(ALC_m1)
##                             numDF denDF  F-value p-value
## (Intercept)                     1    44 776.6554  <.0001
## Tube                            2    44   2.8118  0.0709
## RNAisolation                    1    44  10.8255  0.0020
## TimePoint                       1    44   0.1389  0.7112
## Tube:RNAisolation               2    44   0.2600  0.7723
## Tube:TimePoint                  2    44   0.4628  0.6325
## RNAisolation:TimePoint          1    44   0.0370  0.8484
## Tube:RNAisolation:TimePoint     2    44   0.4545  0.6377
anova_ALC_m1 <- round(anova(ALC_m1)[c(5, 6, 7), c(4)], digits = 3)

Next, we look at second-order interactions:

ALC_m2<-lme(ALC_calc~(Tube+RNAisolation+TimePoint)^2,
       random=~1|BiologicalReplicate, 
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=ALC)

anova(ALC_m2)
##                        numDF denDF  F-value p-value
## (Intercept)                1    46 767.8574  <.0001
## Tube                       2    46   2.8738  0.0667
## RNAisolation               1    46  11.1645  0.0017
## TimePoint                  1    46   0.1428  0.7072
## Tube:RNAisolation          2    46   0.2681  0.7660
## Tube:TimePoint             2    46   0.4647  0.6312
## RNAisolation:TimePoint     1    46   0.0382  0.8460

There are no significant interactions.

Check of normality of the residuals.

qqnorm(ALC_m1$residuals)
qqline(ALC_m1$residuals)

ALC_m.time<-emmeans(ALC_m2, ~Tube|TimePoint)
p_val.time_ALC <- formatC(summary(pairs(ALC_m.time))$p.value, format="e", 2)

ALC_pwc.time <-  ALC %>% group_by(TimePoint) %>% pairwise_t_test(ALC_calc ~ Tube) %>% add_xy_position(x = "Tube")

ALC_pwc.time$p.adj <- p_val.time_ALC
ALC_pwc.time$p.adj <- as.numeric(ALC_pwc.time$p.adj)
ALC_pwc.time$p.adj.signif <- ifelse(ALC_pwc.time$p.adj <= 0.001, "***", ifelse(ALC_pwc.time$p.adj<=0.01, "**", ifelse(ALC_pwc.time$p.adj<=0.05, "*", "ns")))

ALC_pwc.time
## # A tibble: 6 × 14
##   TimePoint   .y.   group1 group2    n1    n2      p p.signif p.adj p.adj.signif
##   <fct>       <chr> <chr>  <chr>  <int> <int>  <dbl> <chr>    <dbl> <chr>       
## 1 1st timepo… ALC_… Citra… EDTA      10    10 0.282  ns       0.32  ns          
## 2 1st timepo… ALC_… Citra… serum     10    10 0.803  ns       0.999 ns          
## 3 1st timepo… ALC_… EDTA   serum     10    10 0.405  ns       0.346 ns          
## 4 2nd timepo… ALC_… Citra… EDTA      10    10 0.0709 ns       0.109 ns          
## 5 2nd timepo… ALC_… Citra… serum     10    10 0.383  ns       0.407 ns          
## 6 2nd timepo… ALC_… EDTA   serum     10    10 0.33   ns       0.723 ns          
## # ℹ 4 more variables: y.position <dbl>, groups <named list>, xmin <dbl>,
## #   xmax <dbl>
ALC.time_plot<-ggplot(ALC, aes(x = Tube, y = ALC_calc, color=Tube)) +
  geom_boxplot() +
  geom_point(size=0.8) +
  facet_wrap(~ as.factor(TimePoint)) +
  stat_pvalue_manual(ALC_pwc.time, label = "{scales::pvalue(p.adj)}", hide.ns = T, size=3, bracket.size=0.2, tip.length = 0.01) +
  scale_color_manual(values=color_panel)+
  scale_fill_manual(values=color_panel)+
  theme_point2 + 
  theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.position = 'none', axis.title.y = element_text(size=10))+
  labs(y = "reproducibility")

ALC.time_plot
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

ALC_m.kit<-emmeans(ALC_m2, ~Tube|RNAisolation)
p_val.kit_ALC <- formatC(summary(pairs(ALC_m.kit))$p.value, format="e", 2)

ALC_pwc.kit <-  ALC %>% group_by(RNAisolation) %>% pairwise_t_test(ALC_calc ~ Tube) %>% add_xy_position(x = "Tube")

ALC_pwc.kit$p.adj <- p_val.kit_ALC
ALC_pwc.kit$p.adj <- as.numeric(ALC_pwc.kit$p.adj)
ALC_pwc.kit$p.adj.signif <- ifelse(ALC_pwc.kit$p.adj <= 0.001, "***", ifelse(ALC_pwc.kit$p.adj<=0.01, "**", ifelse(ALC_pwc.kit$p.adj<=0.05, "*", "ns")))

ALC_pwc.kit
## # A tibble: 6 × 14
##   RNAisolation .y.      group1  group2    n1    n2      p p.signif p.adj
##   <fct>        <chr>    <chr>   <chr>  <int> <int>  <dbl> <chr>    <dbl>
## 1 CCF2         ALC_calc Citrate EDTA      10    10 0.118  ns       0.253
## 2 CCF2         ALC_calc Citrate serum     10    10 0.693  ns       0.917
## 3 CCF2         ALC_calc EDTA    serum     10    10 0.234  ns       0.454
## 4 MIR0.2       ALC_calc Citrate EDTA      10    10 0.0725 ns       0.164
## 5 MIR0.2       ALC_calc Citrate serum     10    10 0.275  ns       0.515
## 6 MIR0.2       ALC_calc EDTA    serum     10    10 0.457  ns       0.736
## # ℹ 5 more variables: p.adj.signif <chr>, y.position <dbl>,
## #   groups <named list>, xmin <dbl>, xmax <dbl>
ALC.kit_plot<-ggplot(ALC, aes(x = Tube, y = ALC_calc, color=Tube)) +
  geom_boxplot() +
  geom_point(size=0.8) +
  facet_wrap(~ as.factor(RNAisolation)) +
  stat_pvalue_manual(ALC_pwc.kit, label = "{scales::pvalue(p.adj)}", hide.ns = T, size=3, bracket.size=0.2, tip.length = 0.01) +
  scale_color_manual(values=color_panel)+
  scale_fill_manual(values=color_panel)+
  theme_point2 + 
  theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.position = 'none', axis.title.y = element_text(size=10))+
  labs(y = "reproducibility")

ALC.kit_plot
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

#

full_kit2<-ggarrange(Dupl.kit_plot, conc.kit_plot, EndovsERCC.kit_plot, count.kit_plot, ALC.kit_plot, common.legend = T, align = c("hv"), ncol = 1, nrow = 5)
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
ggsave("./data_output/Kits_full2.pdf", plot=full_kit2, dpi=300, height = 15, width = 4)

full_time <- ggarrange(Dupl.time_plot, conc.Time_plot, EndovsERCC.time_plot, count.time_plot, ALC.time_plot, common.legend = T, align = c("hv"), ncol = 1, nrow = 5)
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## Warning: Graphs cannot be vertically aligned unless the axis parameter is set.
## Placing graphs unaligned.
ggsave("./data_output/Time_full.pdf", plot=full_time, dpi=300, height = 15, width = 6)


full_plot <- ggarrange(full_kit2, full_time, ncol=2, common.legend = T)
ggsave("./mRNA_Boxplots_full.pdf", plot=full_plot, dpi=300, height = 15, width = 8)

7 conclusion

For duplicates and gene count, we see a tube-kit interaction. For both metrics the serum tube combined with the QIAamp extraction kit seems to perform worse than the other combinations. For RNA concentration and extraction efficiency, there seems to be a tube-time interaction. The EDTA tube at timepoint 16h performs significantly better than other combinations. In case of Extraction efficiency, this is also the case with the citrate tube.

overview <- data.frame(row.names = c("Tube:RNAisolation","Tube:TimeLapse","RNAisolation:TimeLapse"), "duplicates"= anova_dupl_m1, "RNA conc (endoVSseq)" = anova_EndovsSeq_m1, "extraction efficiency" = anova_EndovsERCC_InputCorr_m1, "gene count" = anova_count_m1, "ALC" = anova_ALC_m1, check.names = FALSE)
overview_t <- t(overview)

#breaks for heatmap
bk1 <- c(seq(0,0.051,by=0.005))

#make color palette
custom_palette = c(colorRampPalette(brewer.pal(n = 7, name =
  "RdBu"))(20))
custom_palette2 = c(custom_palette[1:10], 'white')

#make matrix with labels for heatmap
labels_tab <- overview_t
labels_tab[labels_tab<0.001]<-"<0.001"

#make heatmap
phm<-pheatmap::pheatmap(overview_t,
                   breaks = bk1,
                   display_numbers = labels_tab,fontsize_number=11, number_color="black",
                   color = custom_palette2,
                   angle_col = 45,
                   cluster_rows=FALSE, cluster_cols=FALSE)

ggsave("./mRNAInteractions2.pdf", plot = phm, dpi = 300)
## Saving 7 x 5 in image